This notebook can be divided into 3 parts where after running them one time, they can all be run separately. The first one deals with preparation of the data such as normalization and getting rid of batch effects. Seurat objects prepared thanks to those operations are then saved as rds files. The second one looks for differentially expressed genes (DEGs) and saves them to csv files. The last one creates multitude of plots which try to capture the nature of analyzed data.
counts<-read.table("data/scRNA_rawCounts.tsv",header= TRUE, sep = "\t")
meta<- read.table("data/scRNA_metadata.tsv",header= TRUE, sep = "\t")
supp<- read.csv("data/supp_data.csv" ,header= TRUE, sep = ";", stringsAsFactors=FALSE)
meta_changed<- meta[,c(1:3,8, 9)]
meta_changed$patient<- as.character(meta_changed$patient)
rownames(meta_changed)<- meta_changed$sampleID
colnames(meta_changed)[c(2,3,5)]<- c("Batch", "Patient","Diagnosis")
meta_changed$Diagnosis<- as.character(meta_changed$Diagnosis)
meta_changed$Diagnosis<- ifelse(meta_changed$Diagnosis == "ct", "Control", meta_changed$Diagnosis)
meta_changed2<- meta_changed[meta_changed$Patient != "AD-un", ]
meta_changed2<- meta_changed2[meta_changed2$Patient != "Ct-un", ]
supp_changed <- supp
supp_changed$patient<- c("AD1","AD2", "AD3", "AD4" ,"AD5", "AD6" , "Ct1", "Ct2", "Ct3", "Ct4", "Ct5", "Ct6")
supp_changed2<- supp_changed[!is.na(supp_changed$ApoE), c(9,2:6)]
colnames(supp_changed2)[c(1,3)]<- c("Patient", "Sex")
# Removing patients with ApoE different than 33 or 34
supp_changed3<- supp_changed2[supp_changed2$ApoE != "24", ]
supp_changed3<- supp_changed3[supp_changed3$ApoE != "44", ]
meta_changed2<- meta_changed2[meta_changed2$Patient %in% supp_changed3$Patient, ]
counts2<- counts
rownames(counts2)<- counts2$geneName
counts2$geneName<- NULL
meta_changed3<- dplyr::inner_join(meta_changed2, supp_changed3[,c(1:4,6)], by="Patient")
rownames(meta_changed3)<- meta_changed3$sampleID
meta_changed3$sampleID<- NULL
meta_changed3$TAG<- rownames(meta_changed3)
meta_changed3$ApoE %<>% factor
meta_changed3$Sex %<>% factor
meta_changed3$Batch %<>% factor
counts3<- counts2[,colnames(counts2) %in% rownames(meta_changed3)]
grub <- CreateSeuratObject(counts = counts3, project = "grub", min.cells = 3, min.features = 200, meta.data = meta_changed3)
grub[["percent.mt"]] <- PercentageFeatureSet(grub, pattern = "^MT-")
# Visualize the data
VlnPlot(grub, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(grub, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(grub, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 +plot2
# There is no need to filter cells because all of the data is in an acceptable range
range(grub$nFeature_RNA)
## [1] 274 1629
range(grub$percent.mt)
## [1] 0.000000 9.952324
# Normalize data with default settings
grub<- NormalizeData(object = grub)
# Find the most variable genes
grub <- FindVariableFeatures(object = grub, selection.method = "vst", nfeatures = 2000, x.low.cutoff = 0.0125, x.high.cutoff = 3, y.cutoff = 0.5)
top10_varfeatures <- head(VariableFeatures(grub), 10)
plot1 <- VariableFeaturePlot(grub)
plot2 <- LabelPoints(plot = plot1, points = top10_varfeatures, repel = TRUE)
plot1 + plot2
all.genes <- rownames(grub)
grub <- ScaleData(object = grub, features = all.genes)
grub <- RunPCA(object = grub, features = VariableFeatures(object = grub))
ElbowPlot(grub, ndims = 50)
grub <- RunUMAP(grub, dims = 1:10)
grub <- RunTSNE(grub, dims = 1:10)
dittoDimPlot( object = grub, "Batch",reduction = "pca", color.panel = dittoColors()[c(39,15,7,36,25,17)])
dittoDimPlot( object = grub, "Batch",reduction = "umap" ,color.panel = dittoColors()[c(39,15,7,36,25,17)])
dittoDimPlot( object = grub, "Batch",reduction = "tsne" ,color.panel = dittoColors()[c(39,15,7,36,25,17)])
dittoDimPlot( object = grub, "cellType",reduction = "pca")
dittoDimPlot( object = grub, "cellType",reduction = "umap")
dittoDimPlot( object = grub, "cellType",reduction = "tsne")
DimHeatmap(grub, dims = 1:15, cells = 500, balanced = TRUE)
grub.list <- SplitObject(grub, split.by = "Batch")
for (i in 1:length(grub.list)) {
grub.list[[i]] <- SCTransform(grub.list[[i]], verbose = FALSE)
}
grub.features <- SelectIntegrationFeatures(object.list = grub.list, nfeatures = 3000)
options(future.globals.maxSize = 1000 * 1024^3)
grub.list <- PrepSCTIntegration(object.list = grub.list, anchor.features = grub.features, verbose = FALSE)
grub.anchors <- FindIntegrationAnchors(object.list = grub.list, normalization.method = "SCT", anchor.features = grub.features, verbose = FALSE)
grub.integrated <- IntegrateData(anchorset =grub.anchors, normalization.method = "SCT", verbose = FALSE)
DefaultAssay(grub.integrated) <- "integrated"
grub.integrated <- RunPCA(grub.integrated, verbose = FALSE)
ElbowPlot(grub.integrated, ndims =50)
grub.integrated <- RunUMAP(grub.integrated, dims = 1:10)
grub.integrated <- RunTSNE(grub.integrated, dims = 1:10)
#QC plots
p1 <- DimPlot(grub.integrated, reduction = "umap", group.by = "Batch")
p2 <- DimPlot(grub.integrated, reduction = "umap", group.by = "Diagnosis")
plot_grid(p1, p2)
VlnPlot(grub.integrated, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
all.genes <- rownames(grub.integrated)
brain_markers<- markers_df_human_brain
brain_markers<- brain_markers[brain_markers$markers %in% all.genes,]
slice <- dplyr::slice
brain_markers2<- brain_markers %>%
group_by(cell) %>%
slice(1:200)
neu_markers<- list(brain_markers2$markers[brain_markers2$cell == "neu"])
end_markers<- list(brain_markers2$markers[brain_markers2$cell == "end"])
ast_markers<- list(brain_markers2$markers[brain_markers2$cell == "ast"])
mic_markers<- list(brain_markers2$markers[brain_markers2$cell == "mic"])
oli_markers<- list(brain_markers2$markers[brain_markers2$cell == "oli"])
opc_markers<- list(brain_markers2$markers[brain_markers2$cell == "opc"])
grub <- grub.integrated
DefaultAssay(grub) <- "SCT"
grub<-AddModuleScore(object=grub, features = neu_markers, pool = NULL, nbin = 24, ctrl = 100, k = FALSE, assay = NULL, name = 'neu', seed = 1, search = FALSE)
grub<-AddModuleScore(object=grub, features = end_markers, pool = NULL, nbin = 24, ctrl = 100, k = FALSE, assay = NULL, name = 'end', seed = 1, search = FALSE)
grub<-AddModuleScore(object=grub, features = ast_markers, pool = NULL, nbin = 24, ctrl = 100, k = FALSE, assay = NULL, name = 'ast', seed = 1, search = FALSE)
grub<-AddModuleScore(object=grub, features = mic_markers, pool = NULL, nbin = 24, ctrl = 100, k = FALSE, assay = NULL, name = 'mic', seed = 1, search = FALSE)
grub<-AddModuleScore(object=grub, features = oli_markers, pool = NULL, nbin = 24, ctrl = 100, k = FALSE, assay = NULL, name = 'oli', seed = 1, search = FALSE)
grub<-AddModuleScore(object=grub, features = opc_markers, pool = NULL, nbin = 24, ctrl = 100, k = FALSE, assay = NULL, name = 'opc', seed = 1, search = FALSE)
dittoDimPlot( object = grub, "neu1",reduction = "umap")
dittoDimPlot( object = grub, "end1",reduction = "umap")
dittoDimPlot( object = grub, "ast1",reduction = "umap")
dittoDimPlot( object = grub, "mic1",reduction = "umap")
dittoDimPlot( object = grub, "oli1",reduction = "umap")
dittoDimPlot( object = grub, "opc1",reduction = "umap")
grub_new_meta<- grub@meta.data
# cell_type based on maximum score
grub_new_meta$cell_type<- names(grub_new_meta[, 16:21])[apply(grub_new_meta[, 16:21],1,which.max)]
table(grub_new_meta$cell_type)
##
## ast1 end1 mic1 neu1 oli1 opc1
## 2169 67 399 761 5615 966
# Max and second max score
grub_new_meta$x1<- apply(grub_new_meta[, 16:21], 1, max)
grub_new_meta$x2<- apply(grub_new_meta[, 16:21], 1, function(x) x[order(x)[5]])
# Get the cell type value = (x1-x2)/x1
grub_new_meta$x1_x2<- (grub_new_meta$x1 - grub_new_meta$x2)/grub_new_meta$x1
# If the value is < 0.2 then this cell is a hybrid
grub_new_meta$x1_x2<- ifelse(grub_new_meta$x1_x2 < 0.2, 1,0)
grub_new_meta$cell_type2<- grub_new_meta$cell_type
grub_new_meta$cell_type2<- ifelse(grub_new_meta$x1_x2 == 1, "hybrid", grub_new_meta$cell_type2 )
table(grub_new_meta$cell_type2)
##
## ast1 end1 hybrid mic1 neu1 oli1 opc1
## 2095 60 345 392 688 5519 878
# Remove numbers from cell types
grub_new_meta$cell_type<- gsub("1", "", grub_new_meta$cell_type)
grub_new_meta$cell_type2<- gsub("1", "", grub_new_meta$cell_type2)
write.csv(grub_new_meta, file="Cell_id_scores.csv")
grub@meta.data<- grub_new_meta
# Tables showing differences beetween cell type assignment
table(grub_new_meta$cellType, grub_new_meta$cell_type)
##
## ast end mic neu oli opc
## astro 1723 0 0 0 0 0
## doublet 107 7 14 121 14 90
## endo 7 56 0 0 0 0
## mg 1 0 339 0 0 0
## neuron 1 0 0 454 0 0
## oligo 17 0 1 11 5375 24
## OPC 3 0 0 9 0 779
## unID 310 4 45 166 226 73
# Grubman et. all vs Belonwu et. all
table(grub_new_meta$cellType, grub_new_meta$cell_type2)
##
## ast end hybrid mic neu oli opc
## astro 1723 0 0 0 0 0 0
## doublet 92 5 65 10 107 5 69
## endo 5 52 6 0 0 0 0
## mg 1 0 1 338 0 0 0
## neuron 1 0 1 0 453 0 0
## oligo 11 0 54 1 2 5346 14
## OPC 1 0 46 0 0 0 744
## unID 261 3 172 43 126 168 51
table(grub_new_meta$cell_type, grub_new_meta$cell_type2)
##
## ast end hybrid mic neu oli opc
## ast 2095 0 74 0 0 0 0
## end 0 60 7 0 0 0 0
## mic 0 0 7 392 0 0 0
## neu 0 0 73 0 688 0 0
## oli 0 0 96 0 0 5519 0
## opc 0 0 88 0 0 0 878
dittoDimPlot( 'cellType', object = grub, reduction = "umap", color.panel = c("#E69F00" ,"#dd1c77","#56B4E9" ,"#009E73", "#F0E442","#D55E00", "#0072B2","#666666" ))
dittoDimPlot( 'cell_type', object = grub, reduction = "umap", color.panel = c("#E69F00" ,"#56B4E9" ,"#009E73", "#F0E442", "#D55E00" , "#0072B2"))
dittoDimPlot( 'cell_type2', object = grub, reduction = "umap", color.panel = c("#E69F00" ,"#56B4E9" ,"#dd1c77" ,"#009E73", "#F0E442", "#D55E00", "#0072B2"))
dittoBarPlot(grub, "cellType", group.by = "Patient",color.panel = c("#E69F00" ,"#dd1c77","#56B4E9" ,"#009E73", "#F0E442","#D55E00", "#0072B2","#666666" ), main="Grubman et. all") +theme(title = element_text(size=25, vjust=0.5))
dittoBarPlot(grub, "cell_type", group.by = "Patient",color.panel = c("#E69F00" ,"#56B4E9" ,"#009E73", "#F0E442", "#D55E00" , "#0072B2"))
dittoBarPlot(grub, "cell_type2", group.by = "Patient",color.panel = c("#E69F00" ,"#56B4E9" , "#009E73", "#dd1c77", "#F0E442", "#D55E00", "#0072B2"), main="Belonwu et. all method") +theme(title = element_text(size=25, vjust=0.5))
# Save the object with new labels
saveRDS(grub, file = "grub_new_labels_full.rds")
grub_no_hybrids <- subset(grub, subset = cell_type2 != "hybrid")
saveRDS(grub_no_hybrids, file = "grub_new_label_no_hybrid.rds")
# Clear the memory
rm(list = ls(all.names = TRUE))
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 7305675 390.2 12010500 641.5 12010500 641.5
## Vcells 161576527 1232.8 1017094440 7759.9 1269889629 9688.5
library(readxl)
library(SingleCellExperiment)
library(scater)
library(scran)
library(limma)
library(edgeR)
library(Seurat)
grub_dge<- readRDS("grub_new_label_no_hybrid.rds")
grub_dge$diag_apoe_cell<- paste0(grub_dge$Diagnosis,"_", grub_dge$ApoE,"_",grub_dge$cell_type)
Idents(grub_dge) <- "diag_apoe_cell"
DefaultAssay(grub_dge)<- "SCT"
grub_dge$diag_apoe_cell<- as.factor(grub_dge$diag_apoe_cell)
grub_dge$Batch<- as.factor(grub_dge$Batch)
#Create design
design<- model.matrix(~0 + diag_apoe_cell+ Sex, data = grub_dge@meta.data)
colnames(design) <- gsub("diag_apoe_cell", "", colnames(design))
colnames(design) <- gsub("Sex", "", colnames(design))
# Filter and normalize
dge <- as.matrix(GetAssayData(grub_dge, slot="counts"))
dge <- DGEList(counts= dge)
keep <- filterByExpr(dge, design)
summary(keep)
## Mode FALSE TRUE
## logical 10648 197
dge <- dge[keep, ,keep.lib.sizes=FALSE]
dge <- calcNormFactors(dge, method = "TMMwsp")
vm<- voom(dge, design)
fit <- lmFit(vm, design)
contrasts.matrix<- makeContrasts(ast33= AD_33_ast - Control_33_ast, ast34 = AD_34_ast - Control_34_ast, mic33= AD_33_mic - Control_33_mic, mic34= AD_34_mic - Control_34_mic, neu33= AD_33_neu - Control_33_neu, neu34= AD_34_neu - Control_34_neu, oli33= AD_33_oli - Control_33_oli, oli34= AD_34_oli - Control_34_oli, opc33= AD_33_opc - Control_33_opc, opc34= AD_34_opc - Control_34_opc, levels = colnames(design))
fit <- contrasts.fit(fit, contrasts = contrasts.matrix)
fit <- eBayes(fit)
grub_dge$cell_apoe<- paste0(grub_dge$cell_type,grub_dge$ApoE)
unique(grub_dge$cell_apoe)
## [1] "oli34" "ast34" "end34" "neu34" "mic34" "opc34" "oli33" "ast33" "opc33"
## [10] "mic33" "neu33" "end33"
delist_key<- c("ast33", "ast34", "mic33", "mic34",
"neu33" ,"neu34", "oli33" ,"oli34", "opc33" , "opc34" )
library(stringr)
markers2= NULL
markers3= NULL
for(key in delist_key){
apoe<- str_sub(key,-2,-1)
c_type<- gsub(apoe,"",key)
markers<- topTable(fit, coef= key, sort.by = "P", number = Inf, adjust.method = "BH")
markers$group<- key
markers$ApoE<- apoe
markers$cell_type<- c_type
markers$gene<- rownames(markers)
markers$dir<- ifelse(markers$logFC < 0, "neg","pos")
colnames( markers)[c(1,4,5)] <- c("avg_logFC", "p_val", "p_val_adj")
markers2<- rbind(markers2, markers)
markers<- subset(markers, p_val_adj < 0.05 & abs(avg_logFC) > 0.25)
markers3<- rbind(markers3, markers)
}
table(markers3$group, markers3$dir)
##
## neg pos
## ast33 85 20
## ast34 41 123
## mic33 43 18
## mic34 15 135
## neu33 91 10
## neu34 33 97
## oli33 103 20
## oli34 49 112
## opc33 48 15
## opc34 53 113
de33_markersOG<- markers2[markers2$ApoE == "33",]
de34_markersOG<- markers2[markers2$ApoE == "34",]
write.csv(de33_markersOG, file="grub_DEG33.csv")
write.csv(de34_markersOG, file="grub_DEG34.csv")
# Clear the memory
rm(list = ls(all.names = TRUE))
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 8030649 428.9 12010500 641.5 12010500 641.5
## Vcells 162697536 1241.3 813675552 6207.9 1269889629 9688.5
library(dittoSeq)
library(dplyr)
library(UpSetR)
library(Seurat)
library(pheatmap)
library(RColorBrewer)
library(Hmisc)
library(stringr)
library(grid)
library(ComplexHeatmap)
grub<- readRDS( "grub_new_label_no_hybrid.rds")
meta<- read.table("data/scRNA_metadata.tsv",header= TRUE, sep = "\t")
supp<- read.csv("data/supp_data.csv" ,header= TRUE, sep = ";", stringsAsFactors=FALSE)
DefaultAssay(grub) <- "SCT"
grub$Braak<- ifelse(grub$Diagnosis == "AD", "6", "NR")
grub$Plaque<- ifelse(grub$Diagnosis == "AD", "Numerous diffuse and neuritic Abeta plaque", "None")
grub$Plaque<- ifelse(grub$Patient == "Ct1", "Occasional diffuse plaque in cortex", grub$Plaque)
grub$CERAD<- ifelse(grub$Plaque == "Numerous diffuse and neuritic Abeta plaque", "1", "4") #1=definite, 4=No AD
grub$CERAD<- ifelse(grub$Plaque == "Occasional diffuse plaque in cortex", "3",grub$CERAD) #3=possible, 2=probable
grub$CERAD<- as.factor(grub$CERAD)
grub$Braak<- as.factor(grub$Braak)
library(Hmisc)
grub$cell_type<- capitalize(grub$cell_type)
grub$ApoE<- ifelse(grub$ApoE == "33", "3/3", grub$ApoE)
grub$ApoE<- ifelse(grub$ApoE == "34", "3/4", grub$ApoE)
grub$diag_cell_apoe<- paste0(grub$Diagnosis,"_", grub$cell_type, "_", grub$ApoE)
grub0<-readRDS("grub_original.rds")
grub0 <- subset(grub0, subset = TAG %in% grub$TAG)
DefaultAssay(grub0)<- "RNA"
grub0@meta.data<-dplyr::inner_join(grub0@meta.data, grub@meta.data[,c(12,22,26:29)], by="TAG")
grub0$cell_type<- capitalize(grub0$cell_type)
grub0$ApoE<- ifelse(grub0$Patient == "AD1", "3/4", grub0$ApoE)
grub0$ApoE<- ifelse(grub0$Patient == "AD2", "3/4", grub0$ApoE)
grub0$ApoE<- ifelse(grub0$Patient == "AD4", "3/3", grub0$ApoE)
grub0$ApoE<- ifelse(grub0$Patient == "AD6", "3/4", grub0$ApoE)
grub0$ApoE<- ifelse(grub0$Patient == "Ct1", "3/3", grub0$ApoE)
grub0$ApoE<- ifelse(grub0$Patient == "Ct2", "3/3", grub0$ApoE)
grub0$ApoE<- ifelse(grub0$Patient == "Ct3", "3/3", grub0$ApoE)
grub0$ApoE<- ifelse(grub0$Patient == "Ct4", "3/4", grub0$ApoE)
grub0$ApoE<- ifelse(grub0$Patient == "Ct5", "3/3", grub0$ApoE)
#Diagnosis
dittoDimPlot( object = grub, "Diagnosis",reduction = "umap", cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"], legend.title = "Diagnosis",main = "Diagnosis", color.panel = c("#007756", "#D5C711")) + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#Sex
dittoDimPlot( object = grub, "Sex", reduction = "umap", cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"],color.panel = c("#A04700", "#005685" ), legend.title = "Sex", main = "Sex" ) + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#ApoE
dittoDimPlot( object = grub, "ApoE", reduction = "umap", cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"],color.panel =c( "#AD7700", "#1C91D4","#9AD2F2"), legend.title = "APOE", main = "APOE") + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#Batch
dittoDimPlot( object = grub, "Batch", reduction = "umap", cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"],color.panel = dittoColors()[c(39,15,7,36,25,17)], legend.title = "Batch", main = "Batch") + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#Celltype
table(grub$cell_type)
##
## Ast End Mic Neu Oli Opc
## 2095 60 392 688 5519 878
dittoDimPlot( object = grub, "cell_type", reduction = "umap", do.label= TRUE,labels.size = 14, cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"], main= "Cell type",color.panel = c("#E69F00","#0072B2", "aquamarine2","#D55E00","#CC79A7")) + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(), legend.position="none", axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#Patient
table(grub$Patient)
##
## AD1 AD2 AD4 AD6 Ct1 Ct2 Ct3 Ct4 Ct5
## 1483 1354 1338 437 657 279 1921 996 1167
dittoDimPlot( object = grub, "Patient", reduction = "umap", cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"], legend.title = "Subject", main = "Subject") + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#CERAD
dittoDimPlot( object = grub, "CERAD", reduction = "umap", cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"], legend.title = "CERAD", color.panel = c(dittoColors()[20],dittoColors()[22],dittoColors()[23]), main = "CERAD") + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#Diagnosis
dittoDimPlot( object = grub0, "Diagnosis",reduction = "umap", cells.use = colnames( grub0)[grub0$cell_type != "End"& grub0$ApoE != "44"], legend.title = "Diagnosis",main = "Diagnosis", color.panel = c("#007756", "#D5C711")) + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#Sex
dittoDimPlot( object = grub0, "Sex", reduction = "umap", cells.use = colnames( grub0)[grub0$cell_type != "End"& grub0$ApoE != "44"],color.panel = c("#A04700", "#005685" ), legend.title = "Sex",main = "Sex" ) + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#ApoE
dittoDimPlot( object = grub0, "ApoE", reduction = "umap", cells.use = colnames( grub0)[grub0$cell_type != "End"& grub0$ApoE != "44"],color.panel =c( "#AD7700", "#1C91D4","#9AD2F2"), legend.title = "APOE", main = "APOE") +theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#Batch
dittoDimPlot( object = grub0, "Batch", reduction = "umap", cells.use = colnames( grub0)[grub0$cell_type != "End"& grub0$ApoE != "44"],color.panel = dittoColors()[c(39,15,7,36,25,17)], legend.title = "Batch", main = "Batch")+ theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#Celltype
dittoDimPlot( object = grub0, "cell_type", reduction = "umap", do.label= TRUE,labels.size = 14, cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"], main= "Cell type",color.panel = c("#E69F00","#0072B2", "aquamarine2","#D55E00","#CC79A7")) + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(), legend.position="none", axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#Patient
table(grub$Patient)
##
## AD1 AD2 AD4 AD6 Ct1 Ct2 Ct3 Ct4 Ct5
## 1483 1354 1338 437 657 279 1921 996 1167
dittoDimPlot( object = grub0, "Patient", reduction = "umap", cells.use = colnames( grub0)[grub0$cell_type != "End" & grub0$ApoE != "44"], legend.title = "Subject", main = "Subject") + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
#Plaque
dittoDimPlot( object = grub0, "CERAD", reduction = "umap", cells.use = colnames( grub0)[grub0$cell_type != "End" & grub0$ApoE != "44"], legend.title = "CERAD", color.panel = c(dittoColors()[20],dittoColors()[22],dittoColors()[23]), main = "CERAD") + theme(axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(),legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.background = element_rect(colour = "black", size=1.5))
de33_markers_all<- read.csv("grub_DEG33.csv",header= TRUE, sep = ",", stringsAsFactors=FALSE)
de34_markers_all<- read.csv("grub_DEG34.csv",header= TRUE, sep = ",", stringsAsFactors=FALSE)
de33_markers_all$X <- NULL
de34_markers_all$X<- NULL
#remove MALAT1- reported to be an artifact by Grubman et. all
de33_markers_all<-de33_markers_all[de33_markers_all$gene != "MALAT1",]
de34_markers_all<- de34_markers_all[de34_markers_all$gene != "MALAT1",]
de33_markers<- subset(de33_markers_all, p_val_adj < 0.05 & abs(avg_logFC) > 0.25)
de34_markers<- subset(de34_markers_all, p_val_adj < 0.05 & abs(avg_logFC) > 0.25)
all_de_num<- rbind(as.data.frame(table(de33_markers$group, de33_markers$dir)),as.data.frame(table(de34_markers$group, de34_markers$dir)))
all_de_num$Freq<- ifelse(all_de_num$Var2 == "neg", all_de_num$Freq * -1,all_de_num$Freq)
all_de_num$ApoE<- str_sub(all_de_num$Var1,-2,-1)
all_de_num$Celltype<- gsub(all_de_num$ApoE,"",all_de_num$Var1)
all_de_num$Celltype<- gsub(34,"",all_de_num$Celltype)
table(de33_markers$group, de33_markers$dir)
##
## neg pos
## ast33 85 19
## mic33 43 17
## neu33 91 9
## oli33 103 19
## opc33 48 14
table(de34_markers$group, de34_markers$dir)
##
## neg pos
## ast34 40 123
## mic34 14 135
## neu34 32 97
## oli34 48 112
## opc34 52 113
library(Hmisc)
all_de_num$Celltype<- capitalize(all_de_num$Celltype)
ggplot(all_de_num, aes(x = Celltype, y = Freq))+ geom_col(aes(fill = ApoE), position = "dodge", width = 0.5) + xlab("Cell type") + ylab("Number of DE genes") + coord_flip() + scale_fill_manual(values = c("#AD7700" ,"#1C91D4")) + scale_y_continuous(labels = function(x) abs(x), n.breaks = 8) + theme_bw() + theme(legend.text = element_text( size = 20), legend.title = element_text( size = 20), axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_text(size=20),panel.border = element_blank(),panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_hline(yintercept = 0,lty=2) + expand_limits(y= 150)
marktype<- c("Ast", "Neu", "Mic", "Oli" , "Opc")
for (i in marktype){
p1= print(paste0('table(',i,'_markers$sig)'),sep='', quote=FALSE)
eval(expr = parse(text = c(p1)))
}
## [1] table(Ast_markers$sig)
## [1] table(Neu_markers$sig)
## [1] table(Mic_markers$sig)
## [1] table(Oli_markers$sig)
## [1] table(Opc_markers$sig)
#explore range of log fold changes
for (i in marktype){
p1= print(paste0('hist(',i,'_markers$avg_logFC.x)'),sep='', quote=FALSE)
p2= print(paste0('hist(',i,'_markers$avg_logFC.y)'),sep='', quote=FALSE)
eval(expr = parse(text = c(p1,p2)))
}
## [1] hist(Ast_markers$avg_logFC.x)
## [1] hist(Ast_markers$avg_logFC.y)
## [1] hist(Neu_markers$avg_logFC.x)
## [1] hist(Neu_markers$avg_logFC.y)
## [1] hist(Mic_markers$avg_logFC.x)
## [1] hist(Mic_markers$avg_logFC.y)
## [1] hist(Oli_markers$avg_logFC.x)
## [1] hist(Oli_markers$avg_logFC.y)
## [1] hist(Opc_markers$avg_logFC.x)
## [1] hist(Opc_markers$avg_logFC.y)
#assign colors
my_col <- as.character(Ast_markers$sigcols)
names(my_col) <- as.character(Ast_markers$sig)
# Plots
ggplot(Ast_markers, aes(x= avg_logFC.x, y=avg_logFC.y, color= sig)) + geom_point(size=0.5, show.legend = FALSE) + labs(title="Astrocytes", color="Adjusted p-value (BH)") + xlab("APOE3/3 AD vs non-AD Log2 FC") + ylab("APOE3/4 AD vs non-AD Log2 FC") + theme_bw() + scale_y_continuous(breaks = seq(-2,4, 1)) + scale_x_continuous(breaks = seq(-2.5,2.5, 0.5)) + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_vline(xintercept = 0) +geom_hline(yintercept = 0)+ scale_color_manual(values=my_col) +geom_text(data=subset(Ast_markers, sig2 == "1" & abs(avg_logFC.x) > 0.25 |sig2 == "1" & abs(avg_logFC.y) > 0.25), size=7, check_overlap= TRUE, aes(label = gene))+ theme( axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_text(size=30, hjust=0.5)) +expand_limits(x= -1.25) + expand_limits(x= 1) + geom_vline(xintercept=c(-0.25,0.25), linetype="dotted") + geom_hline(yintercept=c(-0.25,0.25), linetype="dotted") + guides(color = guide_legend(override.aes = list(size=5)),text = guide_legend(label = FALSE)) + theme(legend.position = "none")
ggplot(Mic_markers, aes(x= avg_logFC.x, y=avg_logFC.y, color= sig)) + geom_point(size=0.5) + labs(title="Microglia", color="Adjusted p-value (BH)") + xlab("APOE3/3 AD vs non-AD Log2 FC") + ylab("APOE3/4 AD vs non-AD Log2 FC") + theme_bw() + scale_y_continuous(breaks = seq(-3,3, 1)) + scale_x_continuous(breaks = seq(-1,1, 0.5)) + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_vline(xintercept = 0) +geom_hline(yintercept = 0) + scale_color_manual(values= my_col) + geom_text(data=subset(Mic_markers, sig2 == "1" & abs(avg_logFC.x) > 0.25 |sig2 == "1" & abs(avg_logFC.y) > 0.25), size=7, check_overlap= TRUE, aes(label = gene)) + theme( axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_text(size=30, hjust=0.5)) +expand_limits(x= -1.25) + expand_limits(x= 1) + geom_vline(xintercept=c(-0.25,0.25), linetype="dotted") + geom_hline(yintercept=c(-0.25,0.25), linetype="dotted") + guides(color = guide_legend(override.aes = list(size=5)),text = guide_legend(label = FALSE))+ theme(legend.position = "none")
ggplot(Neu_markers, aes(x= avg_logFC.x, y=avg_logFC.y, color= sig)) + geom_point(size=0.5) + labs(title="Neurons", color="Adjusted p-value (BH)") + xlab("APOE3/3 AD vs non-AD Log2 FC") + ylab("APOE3/4 AD vs non-AD Log2 FC") + theme_bw() + scale_y_continuous(breaks = seq(-2,4, 1)) + scale_x_continuous(breaks = seq(-1,1, 0.5)) + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_vline(xintercept = 0) +geom_hline(yintercept = 0) + scale_color_manual(values=my_col) + geom_text(data=subset(Neu_markers, sig2 == "1" & abs(avg_logFC.x) > 0.25 |sig2 == "1" & abs(avg_logFC.y) > 0.25), size=7, check_overlap= TRUE, aes(label = gene)) + theme( axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_text(size=30, hjust=0.5)) +expand_limits(x= -1.25) + expand_limits(x= 1) + geom_vline(xintercept=c(-0.25,0.25), linetype="dotted") + geom_hline(yintercept=c(-0.25,0.25), linetype="dotted") + guides(color = guide_legend(override.aes = list(size=5)),text = guide_legend(label = FALSE))+ theme(legend.position = "none")
ggplot(Oli_markers, aes(x= avg_logFC.x, y=avg_logFC.y, color= sig)) + geom_point(size=0.5) + labs(title="Oligodendrocytes", color="Adjusted p-value (BH)") + xlab("APOE3/3 AD vs non-AD Log2 FC") + ylab("APOE3/4 AD vs non-AD Log2 FC") + theme_bw() + scale_y_continuous(breaks = seq(-2,4, 1)) + scale_x_continuous(breaks = seq(-2,2, 0.5)) + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_vline(xintercept = 0) +geom_hline(yintercept = 0) + scale_color_manual(values= my_col) + geom_text(data=subset(Oli_markers, sig2 == "1" & abs(avg_logFC.x) > 0.25 |sig2 == "1" & abs(avg_logFC.y) > 0.25), size=7, check_overlap= TRUE, aes(label = gene)) + theme( axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_text(size=30, hjust=0.5)) +expand_limits(x= -1.25) + expand_limits(x= 1) +
geom_vline(xintercept=c(-0.25,0.25), linetype="dotted") + geom_hline(yintercept=c(-0.25,0.25), linetype="dotted") + guides(color = guide_legend(override.aes = list(size=5)),text = guide_legend(label = FALSE))+ theme(legend.position = "none")
ggplot(Opc_markers, aes(x= avg_logFC.x, y=avg_logFC.y, color= sig)) + geom_point(size=0.5) + labs(title="Oligodendrocyte progenitor cells", color="Adjusted p-value (BH)") + xlab("APOE3/3 AD vs non-AD Log2 FC") + ylab("APOE3/4 AD vs non-AD Log2 FC") + theme_bw() + scale_y_continuous(breaks = seq(-1,3, 1)) + scale_x_continuous(breaks = seq(-2,2, 0.5)) + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_vline(xintercept = 0) +geom_hline(yintercept = 0) + scale_color_manual(values= my_col) + geom_text(data=subset(Opc_markers, sig2 == "1" & abs(avg_logFC.x) > 0.25 |sig2 == "1" & abs(avg_logFC.y) > 0.25), size=7, check_overlap= TRUE, aes(label = gene)) + theme( axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_text(size=30, hjust=0.5)) +expand_limits(x= -1.25) + expand_limits(x= 1) +
geom_vline(xintercept=c(-0.25,0.25), linetype="dotted") + geom_hline(yintercept=c(-0.25,0.25), linetype="dotted") + guides(color = guide_legend(override.aes = list(size=5)),text = guide_legend(label = FALSE))
rm(Ast33_markers,Mic33_markers,Ex33_markers,In33_markers,Oli33_markers,Opc33_markers,Ast34_markers,Mic34_markers,Ex34_markers,In34_markers,Oli34_markers,Opc34_markers)
de33_markers<- subset(de33_markers_all, p_val_adj < 0.05 & abs(avg_logFC) > 0.25)
de34_markers<- subset(de34_markers_all, p_val_adj < 0.05 & abs(avg_logFC) > 0.25)
math_markers<- rbind(de33_markers,de34_markers)
de33 <- subset(de33_markers, gene %in% de34_markers$gene)
de34 <- subset(de34_markers, gene %in% de33_markers$gene)
combined <- rbind(de33,de34)
sorted_comb <- combined[order(combined$gene, -abs(combined$avg_logFC)),]
# removing duplicates from the sorted sub column
ans <- sorted_comb[!duplicated(sorted_comb$gene),]
ans_sorted <- ans[order(-abs(ans$avg_logFC)),]
max_genes <- ans_sorted$gene[1:10]
max_genes
## [1] "DOCK4" "LINC00486" "NEAT1" "CRYAB" "FRMD4A" "DPP10"
## [7] "HSPA1A" "CNTNAP2" "ROBO2" "PPP2R2B"
grub$diag_cell<- paste0(grub$Diagnosis,"_",grub$cell_type)
grub$diag_apoe<- paste0(grub$Diagnosis,"_",grub$ApoE)
dittoPlot( grub, var=max_genes[1] , group.by = "diag_apoe", split.by = "cell_type",legend.show = FALSE, x.reorder = c(1,3,2,4), color.panel = c(dittoColors()[36],dittoColors()[29],dittoColors()[28], dittoColors()[26]), split.ncol=5, cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"]) + theme( axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(), strip.text = element_text(size = 20)) + theme(legend.text = element_blank(), legend.title = element_blank(), axis.text.x = element_blank(), axis.title.x=element_blank(), axis.ticks.x = element_blank())
dittoPlot( grub, max_genes[2] , group.by = "diag_apoe", split.by = "cell_type", legend.show = FALSE, x.reorder = c(1,3,2,4), color.panel = c(dittoColors()[36],dittoColors()[29],dittoColors()[28], dittoColors()[26]), split.ncol=5 , cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"]) + theme( axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(), strip.text = element_text(size = 20)) + theme(legend.text = element_blank(), legend.title = element_blank(), axis.text.x = element_blank(), axis.title.x=element_blank(), axis.ticks.x = element_blank())
dittoPlot( grub, max_genes[3] , group.by = "diag_apoe", split.by = "cell_type", legend.show = FALSE, x.reorder = c(1,3,2,4), color.panel = c(dittoColors()[36],dittoColors()[29],dittoColors()[28], dittoColors()[26]), split.ncol=5 , cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"]) + theme( axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(), strip.text = element_text(size = 20)) + theme(legend.text = element_blank(), legend.title = element_blank(), axis.text.x = element_blank(), axis.title.x=element_blank(), axis.ticks.x = element_blank())
dittoPlot( grub, max_genes[4] , group.by = "diag_apoe", split.by = "cell_type", legend.show = FALSE, x.reorder = c(1,3,2,4), color.panel = c(dittoColors()[36],dittoColors()[29],dittoColors()[28], dittoColors()[26]), split.ncol=5 , cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"]) + theme( axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(), strip.text = element_text(size = 20)) + theme(legend.text = element_blank(), legend.title = element_blank(), axis.text.x = element_blank(), axis.title.x=element_blank(), axis.ticks.x = element_blank())
dittoPlot( grub, max_genes[5] , group.by = "diag_apoe", split.by = "cell_type", x.reorder = c(1,3,2,4), color.panel = c(dittoColors()[36],dittoColors()[29],dittoColors()[28], dittoColors()[26]), split.ncol=5 , cells.use = colnames( grub)[grub$cell_type != "End" & grub$ApoE != "44"]) + theme( axis.text=element_text(size=20), axis.title=element_text(size=20), plot.title = element_blank(), strip.text = element_text(size = 20)) + theme(legend.text = element_text( size = 20), legend.title = element_text( "APOE",size = 20), axis.text.x = element_blank(), axis.title.x=element_blank(), axis.ticks.x = element_blank())
de33_markers<- subset(de33_markers_all, p_val_adj < 0.05)
de34_markers<- subset(de34_markers_all, p_val_adj < 0.05)
grub_markers<- rbind(de33_markers,de34_markers)
grub_markers$group<- capitalize(grub_markers$group)
grub_markers<- grub_markers[,c(1,10,7)]
for(ct in unique(grub_markers$group)){
new_col<- print(paste0('grub_markers$',ct,'<- ifelse(grub_markers$group == "',ct,'",grub_markers$avg_logFC, 0)'),sep='',quote = FALSE)
eval(expr = parse(text = new_col))
}
## [1] grub_markers$Ast33<- ifelse(grub_markers$group == "Ast33",grub_markers$avg_logFC, 0)
## [1] grub_markers$Mic33<- ifelse(grub_markers$group == "Mic33",grub_markers$avg_logFC, 0)
## [1] grub_markers$Neu33<- ifelse(grub_markers$group == "Neu33",grub_markers$avg_logFC, 0)
## [1] grub_markers$Oli33<- ifelse(grub_markers$group == "Oli33",grub_markers$avg_logFC, 0)
## [1] grub_markers$Opc33<- ifelse(grub_markers$group == "Opc33",grub_markers$avg_logFC, 0)
## [1] grub_markers$Ast34<- ifelse(grub_markers$group == "Ast34",grub_markers$avg_logFC, 0)
## [1] grub_markers$Mic34<- ifelse(grub_markers$group == "Mic34",grub_markers$avg_logFC, 0)
## [1] grub_markers$Neu34<- ifelse(grub_markers$group == "Neu34",grub_markers$avg_logFC, 0)
## [1] grub_markers$Oli34<- ifelse(grub_markers$group == "Oli34",grub_markers$avg_logFC, 0)
## [1] grub_markers$Opc34<- ifelse(grub_markers$group == "Opc34",grub_markers$avg_logFC, 0)
# Remove duplicates
grub_markers <- grub_markers %>% group_by(gene) %>% mutate(count = n())
grub_markers<- grub_markers %>%
arrange(gene)
grub_markers<- grub_markers %>% # Specify data frame
group_by(gene) %>% # Specify group indicator
summarise_at(vars(colnames(grub_markers[,4:13])), # Specify column
list(name = sum)) # Specify function
grub_markers<- as.data.frame(grub_markers)
colnames(grub_markers)[2:11]<-gsub("_name","",colnames(grub_markers[2:11]))
rownames(grub_markers)<- grub_markers$gene
grub_markers$gene<- NULL
grub_markers<- grub_markers[,c(1,6,2,7,3,8,4,9,5,10)]
anno <- data.frame( Celltype= factor(colnames(grub_markers)))
rownames(anno)<- anno$Celltype
anno$APOE<- str_sub(anno$Celltype, -2,-1)
anno$Celltype<- str_sub(anno$Celltype, 1,3)
rownames(anno)<- gsub(33, "_3/3",rownames(anno))
rownames(anno)<- gsub(34, "_3/4",rownames(anno))
anno$APOE<- gsub(33, "3/3",anno$APOE)
anno$APOE<- gsub(34, "3/4",anno$APOE)
annoc<- list(APOE = c("3/3"= "#AD7700", "3/4" = "#1C91D4"), Celltype=( c("Ast" = "#E69F00", "Neu"= "aquamarine2", "Mic" = "#0072B2", "Oli"="#D55E00","Opc" ="#CC79A7" )))
colnames(grub_markers)<- gsub(33, "_3/3",colnames(grub_markers))
colnames(grub_markers)<- gsub(34, "_3/4",colnames(grub_markers))
rangem <- max(abs(grub_markers));
pheatmap(grub_markers, angle_col= "90", fontsize_row = 12,fontsize_col= 12 , show_rownames = FALSE, show_colnames = FALSE, scale = "none" , border_color = "NA",fontsize= 12,annotation_col= anno, annotation_colors = annoc ,color = colorRampPalette(c("gold3", "white", "blue"))(100), breaks = seq(-rangem, rangem, length.out = 100))
grub2 <- subset(grub, subset= ApoE != "44")
grub2 <- subset(grub2, subset= cell_type != "End")
dittoHeatmap(grub2, c("DOCK4", "LINC00486", "NEAT1", "CRYAB", "FRMD4A", "DPP10", "HSPA1A","CNTNAP2", "ROBO2", "PPP2R2B" ),
annot.by = c( "cell_type", "ApoE"), order.by = c("ApoE", "cell_type"), heatmap.colors =colorRampPalette(c("gold3", "white", "blue"))(100), complex=TRUE, treeheight_row=30, annotation_colors = list(ApoE = c("3/3"= "#AD7700", "3/4" = "#1C91D4"), cell_type=( c("Ast" = "#E69F00", "Neu"= "aquamarine2", "Mic" = "#0072B2", "Oli"="#D55E00","Opc" ="#CC79A7" ))))
library(grid)
de33.list1<-list( Ast= de33_markers$gene[de33_markers$cell_type == "Ast"],
Mic= de33_markers$gene[de33_markers$cell_type == "Mic"],
Neu= de33_markers$gene[de33_markers$cell_type == "Neu"],
Oli= de33_markers$gene[de33_markers$cell_type == "Oli"],
Opc= de33_markers$gene[de33_markers$cell_type == "Opc"])
de33.list2<-list(Ast_up = de33_markers$gene[de33_markers$cell_type == "Ast" & de33_markers$dir == "pos"],
Mic_up = de33_markers$gene[de33_markers$cell_type == "Mic" & de33_markers$dir == "pos"],
Neu_up = de33_markers$gene[de33_markers$cell_type == "Neu" & de33_markers$dir == "pos"],
Oli_up = de33_markers$gene[de33_markers$cell_type == "Oli" & de33_markers$dir == "pos"],
Opc_up = de33_markers$gene[de33_markers$cell_type == "Opc" & de33_markers$dir == "pos"])
de33.list3<-list(Ast_down = de33_markers$gene[de33_markers$cell_type == "Ast" & de33_markers$dir != "pos"],
Mic_down = de33_markers$gene[de33_markers$cell_type == "Mic" & de33_markers$dir != "pos"],
Neu_down = de33_markers$gene[de33_markers$cell_type == "Neu" & de33_markers$dir != "pos"],
Oli_down = de33_markers$gene[de33_markers$cell_type == "Oli" & de33_markers$dir != "pos"],
Opc_down = de33_markers$gene[de33_markers$cell_type == "Opc" & de33_markers$dir != "pos"])
#c(intersection size title, intersection size tick labels, set size title, set size tick labels, set names, numbers above bars).
upset(fromList(de33.list1), sets = c("Opc","Oli", "Neu", "Mic", "Ast" ), keep.order = TRUE,order.by = "freq", mainbar.y.label = "Gene Intersections", nsets=6,sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 2.25))
grid.text("Shared and unique AD vs non-AD DEGs across APOE3/3 cell types",x = 0.54, y=0.967, gp=gpar(fontsize=12))
upset(fromList(de33.list2), sets = c("Opc_up","Oli_up", "Neu_up", "Mic_up", "Ast_up" ), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 2.25))
grid.text("Shared and unique upregulated AD vs non-AD DEGs across APOE3/3 cell types",x = 0.54, y=0.98, gp=gpar(fontsize=12))
upset(fromList(de33.list3), sets = c("Opc_down","Oli_down", "Neu_down", "Mic_down", "Ast_down" ), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 2.25))
grid.text("Shared and unique downregulated AD vs non-AD DEGs across APOE3/3 cell types",x = 0.54, y=0.98, gp=gpar(fontsize=12))
de34.list1<-list(Mic = de34_markers$gene[de34_markers$cell_type == "Mic"],
Ast= de34_markers$gene[de34_markers$cell_type == "Ast"],
Neu = de34_markers$gene[de34_markers$cell_type == "Neu"],
Oli = de34_markers$gene[de34_markers$cell_type == "Oli"],
Opc = de34_markers$gene[de34_markers$cell_type == "Opc"])
de34.list2<-list(Mic_up = de34_markers$gene[de34_markers$cell_type == "Mic" & de34_markers$dir == "pos"],
Ast_up = de34_markers$gene[de34_markers$cell_type == "Ast" & de34_markers$dir == "pos"],
Neu_up = de34_markers$gene[de34_markers$cell_type == "Neu" & de34_markers$dir == "pos"],
Oli_up = de34_markers$gene[de34_markers$cell_type == "Oli" & de34_markers$dir == "pos"],
Opc_up = de34_markers$gene[de34_markers$cell_type == "Opc" & de34_markers$dir == "pos"])
de34.list3<-list( Mic_down = de34_markers$gene[de34_markers$cell_type == "Mic" & de34_markers$dir != "pos"],
Ast_down = de34_markers$gene[de34_markers$cell_type == "Ast" & de34_markers$dir != "pos"],
Neu_down = de34_markers$gene[de34_markers$cell_type == "Neu" & de34_markers$dir != "pos"],
Oli_down = de34_markers$gene[de34_markers$cell_type == "Oli" & de34_markers$dir != "pos"],
Opc_down = de34_markers$gene[de34_markers$cell_type == "Opc" & de34_markers$dir != "pos"])
upset(fromList(de34.list1), sets = c("Opc","Oli", "Neu", "Mic", "Ast" ), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", nsets=6,sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 1.9))
grid.text("Shared and unique AD vs non-AD DEGs across APOE3/4 cell types",x = 0.54, y=0.967, gp=gpar(fontsize=13))
upset(fromList(de34.list2), sets = c("Opc_up","Oli_up", "Neu_up", "Mic_up", "Ast_up" ), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", nsets=6, sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 2.25))
grid.text("Shared and unique upregulated AD vs non-AD DEGs across APOE3/4 cell types",x = 0.54, y=0.967, gp=gpar(fontsize=13))
upset(fromList(de34.list3), sets = c("Opc_down","Oli_down", "Neu_down", "Mic_down", "Ast_down" ), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", nsets=6,sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 1.9))
grid.text("Shared and unique downregulated AD vs non-AD DEGs across APOE3/4 cell types",x = 0.54, y=0.97, gp=gpar(fontsize=13))
de33_markers<- subset(de33_markers_all, p_val_adj < 0.05 & abs(avg_logFC) > 0.25)
de34_markers<- subset(de34_markers_all, p_val_adj < 0.05 & abs(avg_logFC) > 0.25)
math_markers<- rbind(de33_markers,de34_markers)
table(math_markers$group, math_markers$dir)
##
## neg pos
## Ast33 85 19
## Ast34 40 123
## Mic33 43 17
## Mic34 14 135
## Neu33 91 9
## Neu34 32 97
## Oli33 103 19
## Oli34 48 112
## Opc33 48 14
## Opc34 52 113
gene33_count<-as.data.frame(table(de33_markers$gene))
colnames(gene33_count)<- c("gene","gene33_count")
de33_markers2<- dplyr::inner_join(de33_markers,gene33_count, by="gene")
gene34_count<-as.data.frame(table(de34_markers$gene))
colnames(gene34_count)<- c("gene","gene34_count")
de34_markers2<- dplyr::inner_join(de34_markers,gene34_count, by="gene")
#Join 33 and 34, group by celltype, then count to determine overlapping genes within cell types
colnames(de33_markers2)[12]<-"gene_count"
colnames(de34_markers2)[12]<- "gene_count"
all.de.markers<- rbind(de33_markers2, de34_markers2) #327
all.de.markers$cell_gene<- paste0(all.de.markers$cell_type,"-", all.de.markers$gene)
gene_ct_count<-as.data.frame(table(all.de.markers$cell_gene))
colnames(gene_ct_count)<- c("cell_gene", "gene_ct_count")
all.de.markers<- dplyr::inner_join(all.de.markers,gene_ct_count, by="cell_gene")
table(all.de.markers$cell_type, all.de.markers$ApoE)
##
## 33 34
## Ast 104 163
## Mic 60 149
## Neu 100 129
## Oli 122 160
## Opc 62 165
table(all.de.markers$group, all.de.markers$dir)
##
## neg pos
## Ast33 85 19
## Ast34 40 123
## Mic33 43 17
## Mic34 14 135
## Neu33 91 9
## Neu34 32 97
## Oli33 103 19
## Oli34 48 112
## Opc33 48 14
## Opc34 52 113
#Ast
ast.de.list<- list( "Ast_3/3_up" = de33_markers$gene[de33_markers$cell_type == "Ast"& de33_markers$dir == "pos"],
"Ast_3/4_up" = de34_markers$gene[de34_markers$cell_type == "Ast"& de34_markers$dir == "pos"],
"Ast_3/3_down" = de33_markers$gene[de33_markers$cell_type == "Ast"& de33_markers$dir != "pos"],
"Ast_3/4_down" = de34_markers$gene[de34_markers$cell_type == "Ast"& de34_markers$dir != "pos"])
upset(fromList(ast.de.list), sets = c("Ast_3/4_down", "Ast_3/4_up","Ast_3/3_down","Ast_3/3_up"), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 2))
grid.text("Astrocytes",x = 0.67, y=0.98, gp=gpar(fontsize=20))
#Mic
mic.de.list<- list( "Mic_3/3_up" = de33_markers$gene[de33_markers$cell_type == "Mic"& de33_markers$dir == "pos"],
"Mic_3/4_up" = de34_markers$gene[de34_markers$cell_type == "Mic"& de34_markers$dir == "pos"],
"Mic_3/3_down" = de33_markers$gene[de33_markers$cell_type == "Mic"& de33_markers$dir != "pos"],
"Mic_3/4_down" = de34_markers$gene[de34_markers$cell_type == "Mic"& de34_markers$dir != "pos"])
upset(fromList(mic.de.list), sets = c("Mic_3/4_down", "Mic_3/4_up","Mic_3/3_down","Mic_3/3_up"), keep.order = TRUE,order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label ="Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 1.9))
grid.text("Microglia",x = 0.67, y=0.98, gp=gpar(fontsize=20))
#Neu
neu.de.list<- list( "Neu_3/3_up" = de33_markers$gene[de33_markers$cell_type == "Neu"& de33_markers$dir == "pos"],
"Neu_3/4_up" = de34_markers$gene[de34_markers$cell_type == "Neu"& de34_markers$dir == "pos"],
"Neu_3/3_down" = de33_markers$gene[de33_markers$cell_type == "Neu"& de33_markers$dir != "pos"],
"Neu_3/4_down" = de34_markers$gene[de34_markers$cell_type == "Neu"& de34_markers$dir != "pos"])
upset(fromList(neu.de.list), sets = c("Neu_3/4_down","Neu_3/4_up","Neu_3/3_down","Neu_3/3_up"), keep.order = TRUE,order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 2))
grid.text("Neurons",x = 0.67, y=0.98, gp=gpar(fontsize=20))
#Oli
oli.de.list<- list( "Oli_3/3_up" = de33_markers$gene[de33_markers$cell_type == "Oli"& de33_markers$dir == "pos"],
"Oli_3/4_up" = de34_markers$gene[de34_markers$cell_type == "Oli"& de34_markers$dir == "pos"],
"Oli_3/3_down" = de33_markers$gene[de33_markers$cell_type == "Oli"& de33_markers$dir != "pos"],
"Oli_3/4_down" = de34_markers$gene[de34_markers$cell_type == "Oli"& de34_markers$dir != "pos"])
upset(fromList(oli.de.list),sets = c("Oli_3/4_down","Oli_3/4_up","Oli_3/3_down","Oli_3/3_up"), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 2.25))
grid.text("Oligodendrocytes",x = 0.67, y=0.98, gp=gpar(fontsize=20))
#Opc
opc.de.list<- list( "Opc_3/3_up" = de33_markers$gene[de33_markers$cell_type == "Opc"& de33_markers$dir == "pos"],
"Opc_3/4_up" = de34_markers$gene[de34_markers$cell_type == "Opc"& de34_markers$dir == "pos"],
"Opc_3/3_down" = de33_markers$gene[de33_markers$cell_type == "Opc"& de33_markers$dir != "pos"],
"Opc_3/4_down" = de34_markers$gene[de34_markers$cell_type == "Opc"& de34_markers$dir != "pos"])
upset(fromList(opc.de.list), sets = c("Opc_3/4_down","Opc_3/4_up","Opc_3/3_down","Opc_3/3_up"), keep.order = TRUE,order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 2.25, 2.25, 2.25))
grid.text("Oligodendrocyte progenitor cells",x = 0.72, y=0.98, gp=gpar(fontsize=18))
#Ast
ast.de.list<- list( "Ast_3/3" = de33_markers$gene[de33_markers$cell_type == "Ast"],
"Ast_3/4" = de34_markers$gene[de34_markers$cell_type == "Ast"] )
upset(fromList(ast.de.list), sets = c("Ast_3/4","Ast_3/3"), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 1.9, 2.25, 2))
grid.text("Astrocytes",x = 0.67, y=0.98, gp=gpar(fontsize=20))
#Mic
mic.de.list<- list( "Mic_3/3" = de33_markers$gene[de33_markers$cell_type == "Mic"],
"Mic_3/4" = de34_markers$gene[de34_markers$cell_type == "Mic"] )
upset(fromList(mic.de.list), sets = c("Mic_3/4","Mic_3/3"), keep.order = TRUE,order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label ="Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 1.9, 2.25, 2))
grid.text("Microglia",x = 0.67, y=0.98, gp=gpar(fontsize=20))
#Neu
neu.de.list<- list( "Neu_3/3" = de33_markers$gene[de33_markers$cell_type == "Neu"],
"Neu_3/4" = de34_markers$gene[de34_markers$cell_type == "Neu"] )
upset(fromList(neu.de.list),sets = c("Neu_3/4","Neu_3/3"), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 1.9, 2.25, 2))
grid.text("Neurons",x = 0.67, y=0.98, gp=gpar(fontsize=20))
#Oli
oli.de.list<- list( "Oli_3/3" = de33_markers$gene[de33_markers$cell_type == "Oli"],
"Oli_3/4" = de34_markers$gene[de34_markers$cell_type == "Oli"] )
upset(fromList(oli.de.list), sets = c("Oli_3/4","Oli_3/3"), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 1.9, 2.25, 2.25))
grid.text("Oligodendrocytes",x = 0.67, y=0.98, gp=gpar(fontsize=20))
#Opc
opc.de.list<- list( "Opc_3/3" = de33_markers$gene[de33_markers$cell_type == "Opc"],
"Opc_3/4" = de34_markers$gene[de34_markers$cell_type == "Opc"] )
upset(fromList(opc.de.list), sets = c("Opc_3/4","Opc_3/3"), keep.order = TRUE, order.by = "freq", mainbar.y.label = "Gene Intersections", sets.x.label = "Number of DEGs", text.scale = c(2.25, 2.25, 1.9, 1.9, 2.25, 2.25))
grid.text("Oligodendrocyte progenitor cells",x = 0.71, y=0.98, gp=gpar(fontsize=18))